import pandas as pd 
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
plt.style.use('classic')


from sklearn.datasets import load_iris
iris = load_iris()

df_iris = pd.DataFrame(data=iris['data'],columns=iris['feature_names'])
df_iris['Type'] = iris['target']
df_iris.head()


from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test = train_test_split(iris['data'],iris['target'],test_size=0.3,random_state=1)


from sklearn.linear_model import LogisticRegression
logis = LogisticRegression()


logis.fit(X_train,y_train)
logis.score(X_test,y_test)

0.9777777777777777


logis.predict((np.array(test_data)).reshape(1,-1))

array([1])


logis.predict(X_test)

array([0, 1, 1, 0, 2, 1, 2, 0, 0, 2, 1, 0, 2, 1, 1, 0, 1, 1, 0, 0, 1, 1,
       2, 0, 2, 1, 0, 0, 1, 2, 1, 2, 1, 2, 2, 0, 1, 0, 1, 2, 2, 0, 2, 2,
       1])


y_test

array([0, 1, 1, 0, 2, 1, 2, 0, 0, 2, 1, 0, 2, 1, 1, 0, 1, 1, 0, 0, 1, 1,
       1, 0, 2, 1, 0, 0, 1, 2, 1, 2, 1, 2, 2, 0, 1, 0, 1, 2, 2, 0, 2, 2,
       1])


from sklearn.metrics import confusion_matrix
import seaborn as sn
sn.heatmap(confusion_matrix(y_test,logis.predict(X_test)),annot=True)

<AxesSubplot:>


from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import AdaBoostClassifier


model_names = ['Logistic Regression','KNN','SVM','Decision Tree','Neural Network','Naive Bayes','Random Forest','AdaBoost']


models = [LogisticRegression(),KNeighborsClassifier(),SVC(),DecisionTreeClassifier(),MLPClassifier(max_iter=1000),GaussianNB(),RandomForestClassifier(),AdaBoostClassifier()]


score_list = []
for model in models:
    model.fit(X_train,y_train)
    score_list.append(model.score(X_test,y_test))
plt.figure(figsize=(20,7))
plt.barh(model_names,score_list)

<BarContainer object of 8 artists>


df_iris.groupby('Type').median().plot.bar()

<AxesSubplot:xlabel='Type'>


df_iris.groupby('Type').median()


df_iris.groupby('Type').median().T.plot.bar()

<AxesSubplot:>


from sklearn.cluster import KMeans
km = KMeans(n_clusters=3)
km.fit(X_train)

KMeans(n_clusters=3)


km.predict(X_test)

array([1, 2, 2, 1, 0, 2, 0, 1, 1, 0, 2, 1, 0, 2, 2, 1, 2, 2, 1, 1, 2, 2,
       0, 1, 0, 2, 1, 1, 2, 2, 2, 0, 2, 0, 2, 1, 2, 1, 2, 0, 0, 1, 2, 0,
       2])


df_km_train = pd.DataFrame(data=X_train,columns=iris['feature_names'])
df_km_train['Type'] = km.predict(X_train)
df_km_train['Train'] = 1
df_km_test = pd.DataFrame(data=X_test,columns=iris['feature_names'])
df_km_test['Type'] = km.predict(X_test)
df_km_test['Train'] = 0
df_km = pd.concat([df_km_train,df_km_test],axis=0)


df_km


df_km.groupby(['Type','Train']).mean().iloc[:,:-1].plot.bar()

<AxesSubplot:xlabel='Type,Train'>


km_label = KMeans(n_clusters=3)
km_label.fit(X_train,y_train)
km_label.predict(X_test)

array([1, 0, 0, 1, 2, 0, 2, 1, 1, 2, 0, 1, 2, 0, 0, 1, 0, 0, 1, 1, 0, 0,
       2, 1, 2, 0, 1, 1, 0, 0, 0, 2, 0, 2, 0, 1, 0, 1, 0, 2, 2, 1, 0, 2,
       0])


km.predict(X_test)

array([1, 2, 2, 1, 0, 2, 0, 1, 1, 0, 2, 1, 0, 2, 2, 1, 2, 2, 1, 1, 2, 2,
       0, 1, 0, 2, 1, 1, 2, 2, 2, 0, 2, 0, 2, 1, 2, 1, 2, 0, 0, 1, 2, 0,
       2])


import numpy as np

from matplotlib import pyplot as plt
from scipy.cluster.hierarchy import dendrogram
from sklearn.datasets import load_iris
from sklearn.cluster import AgglomerativeClustering


def plot_dendrogram(model, **kwargs):
    # Create linkage matrix and then plot the dendrogram

    # create the counts of samples under each node
    counts = np.zeros(model.children_.shape[0])
    n_samples = len(model.labels_)
    for i, merge in enumerate(model.children_):
        current_count = 0
        for child_idx in merge:
            if child_idx < n_samples:
                current_count += 1  # leaf node
            else:
                current_count += counts[child_idx - n_samples]
        counts[i] = current_count

    linkage_matrix = np.column_stack([model.children_, model.distances_,
                                      counts]).astype(float)

    # Plot the corresponding dendrogram
    dendrogram(linkage_matrix, **kwargs)


iris = load_iris()
X = iris.data

# setting distance_threshold=0 ensures we compute the full tree.
model = AgglomerativeClustering(distance_threshold=0, n_clusters=None)

model = model.fit(X)
plt.title('Hierarchical Clustering Dendrogram')
# plot the top three levels of the dendrogram
plot_dendrogram(model, truncate_mode='level', p=6)
plt.xlabel("Number of points in node (or index of point if no parenthesis).")
plt.show()


#show


from sklearn.decomposition import PCA
pca = PCA()
pca.fit(X_train)

PCA()


pca.explained_variance_

array([4.36909984, 0.22100548, 0.09049788, 0.0206056 ])


pca.explained_variance_ratio_

array([0.92935669, 0.04701035, 0.01924992, 0.00438304])


pca.components_

array([[ 0.36760197, -0.06649071,  0.85410056,  0.36188398],
       [ 0.63470116,  0.7468054 , -0.17634312, -0.09131939],
       [-0.58983791,  0.58428977,  0.06477864,  0.55362481],
       [-0.33780832,  0.31059   ,  0.48499389, -0.74444632]])


X_train_pca = pca.fit_transform(X_train)


X_test_pca = pca.transform(X_test)


from sklearn.linear_model import LogisticRegression
lr = LogisticRegression()
lr.fit(X_train_pca[:,:1],y_train)

LogisticRegression()


lr.score(X_test_pca[:,:1],y_test)

0.9777777777777777


df_pca = pd.DataFrame(X_train_pca[:,:2],columns=['Z1','Z2'])
df_pca['Type'] = y_train


df_pca


import seaborn as sn
sn.scatterplot(x='Z1',y='Z2',hue='Type',data=df_pca)

<AxesSubplot:xlabel='Z1', ylabel='Z2'>


sn.scatterplot(x='sepal length (cm)',y='sepal width (cm)',hue='Type',data=df_iris)

<AxesSubplot:xlabel='sepal length (cm)', ylabel='sepal width (cm)'>


df_iris

	sepal length (cm)	sepal width (cm)	petal length (cm)	petal width (cm)
0	5.1	3.5	1.4	0.2
1	4.9	3.0	1.4	0.2
2	4.7	3.2	1.3	0.2
3	4.6	3.1	1.5	0.2
4	5.0	3.6	1.4	0.2

	sepal length (cm)	sepal width (cm)	petal length (cm)	petal width (cm)
Type
0	5.0	3.4	1.50	0.2
1	5.9	2.8	4.35	1.3
2	6.5	3.0	5.55	2.0

	sepal length (cm)	sepal width (cm)	petal length (cm)	petal width (cm)	Type	Train
0	7.7	2.6	6.9	2.3	0	1
1	5.7	3.8	1.7	0.3	1	1
2	5.0	3.6	1.4	0.2	1	1
3	4.8	3.0	1.4	0.3	1	1
4	5.2	2.7	3.9	1.4	2	1
...	...	...	...	...	...	...
40	6.8	3.0	5.5	2.1	0	0
41	5.1	3.5	1.4	0.3	1	0
42	6.0	2.2	5.0	1.5	2	0
43	6.3	2.9	5.6	1.8	0	0
44	6.6	2.9	4.6	1.3	2	0

	Z1	Z2	Type
0	3.829677	0.219859	2
1	-2.150407	0.946246	0
2	-2.686849	0.414629	0
3	-2.684287	-0.169526	0
4	0.016025	-0.680996	1
...	...	...	...
100	1.474847	-0.128888	2
101	1.941866	0.060690	2
102	1.323975	-0.317661	1
103	2.354687	0.178675	2
104	-2.759798	0.360291	0

	sepal length (cm)	sepal width (cm)	petal length (cm)	petal width (cm)	Type
0	5.1	3.5	1.4	0.2	0
1	4.9	3.0	1.4	0.2	0
2	4.7	3.2	1.3	0.2	0
3	4.6	3.1	1.5	0.2	0
4	5.0	3.6	1.4	0.2	0
...	...	...	...	...	...
145	6.7	3.0	5.2	2.3	2
146	6.3	2.5	5.0	1.9	2
147	6.5	3.0	5.2	2.0	2
148	6.2	3.4	5.4	2.3	2
149	5.9	3.0	5.1	1.8	2

数学建模

模型知识点

预测模型-机器学习-分类算法

Data - Iris Dataset¶

Split Dataset¶

Logistic Regression¶

Model Comparison¶

Descriptive Figures¶

Unsupervised Learning¶

Clustering - K-Means¶

Hierachy Clustering¶

Other Algorithms¶

Unsupervised Learning - Decomposition¶

	sepal length (cm)	sepal width (cm)	petal length (cm)	petal width (cm)
0	5.1	3.5	1.4	0.2
1	4.9	3.0	1.4	0.2
2	4.7	3.2	1.3	0.2
3	4.6	3.1	1.5	0.2
4	5.0	3.6	1.4	0.2

	sepal length (cm)	sepal width (cm)	petal length (cm)	petal width (cm)	Type	Train
0	7.7	2.6	6.9	2.3	0	1
1	5.7	3.8	1.7	0.3	1	1
2	5.0	3.6	1.4	0.2	1	1
3	4.8	3.0	1.4	0.3	1	1
4	5.2	2.7	3.9	1.4	2	1
...	...	...	...	...	...	...
40	6.8	3.0	5.5	2.1	0	0
41	5.1	3.5	1.4	0.3	1	0
42	6.0	2.2	5.0	1.5	2	0
43	6.3	2.9	5.6	1.8	0	0
44	6.6	2.9	4.6	1.3	2	0

	sepal length (cm)	sepal width (cm)	petal length (cm)	petal width (cm)	Type
0	5.1	3.5	1.4	0.2	0
1	4.9	3.0	1.4	0.2	0
2	4.7	3.2	1.3	0.2	0
3	4.6	3.1	1.5	0.2	0
4	5.0	3.6	1.4	0.2	0
...	...	...	...	...	...
145	6.7	3.0	5.2	2.3	2
146	6.3	2.5	5.0	1.9	2
147	6.5	3.0	5.2	2.0	2
148	6.2	3.4	5.4	2.3	2
149	5.9	3.0	5.1	1.8	2

	sepal length (cm)	sepal width (cm)	petal length (cm)	petal width (cm)
0	5.1	3.5	1.4	0.2
1	4.9	3.0	1.4	0.2
2	4.7	3.2	1.3	0.2
3	4.6	3.1	1.5	0.2
4	5.0	3.6	1.4	0.2

	sepal length (cm)	sepal width (cm)	petal length (cm)	petal width (cm)	Type	Train
0	7.7	2.6	6.9	2.3	0	1
1	5.7	3.8	1.7	0.3	1	1
2	5.0	3.6	1.4	0.2	1	1
3	4.8	3.0	1.4	0.3	1	1
4	5.2	2.7	3.9	1.4	2	1
...	...	...	...	...	...	...
40	6.8	3.0	5.5	2.1	0	0
41	5.1	3.5	1.4	0.3	1	0
42	6.0	2.2	5.0	1.5	2	0
43	6.3	2.9	5.6	1.8	0	0
44	6.6	2.9	4.6	1.3	2	0

	sepal length (cm)	sepal width (cm)	petal length (cm)	petal width (cm)	Type
0	5.1	3.5	1.4	0.2	0
1	4.9	3.0	1.4	0.2	0
2	4.7	3.2	1.3	0.2	0
3	4.6	3.1	1.5	0.2	0
4	5.0	3.6	1.4	0.2	0
...	...	...	...	...	...
145	6.7	3.0	5.2	2.3	2
146	6.3	2.5	5.0	1.9	2
147	6.5	3.0	5.2	2.0	2
148	6.2	3.4	5.4	2.3	2
149	5.9	3.0	5.1	1.8	2

	sepal length (cm)	sepal width (cm)	petal length (cm)	petal width (cm)
0	5.1	3.5	1.4	0.2
1	4.9	3.0	1.4	0.2
2	4.7	3.2	1.3	0.2
3	4.6	3.1	1.5	0.2
4	5.0	3.6	1.4	0.2

	sepal length (cm)	sepal width (cm)	petal length (cm)	petal width (cm)	Type	Train
0	7.7	2.6	6.9	2.3	0	1
1	5.7	3.8	1.7	0.3	1	1
2	5.0	3.6	1.4	0.2	1	1
3	4.8	3.0	1.4	0.3	1	1
4	5.2	2.7	3.9	1.4	2	1
...	...	...	...	...	...	...
40	6.8	3.0	5.5	2.1	0	0
41	5.1	3.5	1.4	0.3	1	0
42	6.0	2.2	5.0	1.5	2	0
43	6.3	2.9	5.6	1.8	0	0
44	6.6	2.9	4.6	1.3	2	0

	sepal length (cm)	sepal width (cm)	petal length (cm)	petal width (cm)	Type
0	5.1	3.5	1.4	0.2	0
1	4.9	3.0	1.4	0.2	0
2	4.7	3.2	1.3	0.2	0
3	4.6	3.1	1.5	0.2	0
4	5.0	3.6	1.4	0.2	0
...	...	...	...	...	...
145	6.7	3.0	5.2	2.3	2
146	6.3	2.5	5.0	1.9	2
147	6.5	3.0	5.2	2.0	2
148	6.2	3.4	5.4	2.3	2
149	5.9	3.0	5.1	1.8	2